from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from firstspider.items import QiubaiItem


class QiubaiSpider(CrawlSpider):
    name = "QiubaiSpider"
    allowed_domains = ["qiushibaike.com"]
    download_delay = 1
    start_urls = [
        "http://www.qiushibaike.com/text"
    ]
    rules = (
        Rule(LinkExtractor(allow=('/text/page/[\d]+\?s=[\d]+', )), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        jokes = response.xpath('//div[contains(@id, "qiushi_tag")]')
        for joke in jokes:
            item = QiubaiItem()
            item['like_num'] = joke.xpath('div[@class="stats"]/span/i/text()').extract_first()
            if int(item['like_num']) < 500:
                continue
            item['joke'] = joke.xpath('div[@class="content"]/text()').extract_first()
            yield item

